
cd "$do"
u "$temp\Full_Panel_1.dta",clear
replace year=year-1



********************************************************************************************
* 1. Construct Panel with Complete Information, Recode Variables
********************************************************************************************



/// I generate indicators for whether in the selected sample ///
gen sample_select = 1 /* If it relates to the sample */ 

* Drop pre-1986 SURVEY data (pre-1985 ref year)
replace sample_select = 0 if year <1985


*Veterancy cleaning*
replace veteran = . if veteran ==9
replace veteran = 0 if veteran ==5


***********************State of residence ***************************************************************************************************
/*Make some imputation if missing*/
replace state = . if state==0|state==99
egen nms=sum(state==.),by(person)
egen nobs=sum(person!=.),by(person)
replace sample_select = 0 if nms==nobs
drop nms nobs

egen nms=sum(state==.),by(person)
gen new_place=moved==1

sort person year
qby person: replace state=state[_n-1] 	if state==. & new_place==0 
gsort person -year
qui by person:replace state=state[_n-1] if state==.  & new_place[_n-1]==0

sort person year

drop nms
egen nms=sum(state==.),by(person)

replace sample_select=0 if nms>0

drop region

# delimit;
gen     region=1 if state==6  | state==18 | state==20 | state==28 | state==29 | state==31 
                              | state==37 | state==38 | state==44;                      				 /* North East*/
replace region=2 if state==12 | state==13 | state==14 | state==15 | state==21 | state==22
                              | state==24 | state==26 | state==33 | state==34 | state==40 | state==48; 	/*Midwest*/
replace region=3 if state==1  | state==3  | state==7  | state==8  | state==9  | state==10 | state==16  
                              | state==17 | state==19 | state==23 | state==32 | state==35 
                              | state==39 | state==41 | state==42 | state==45 | state==47;       		/*South*/
replace region=4 if state==2  | state==4  | state==5  | state==11 | state==25 | state==27 | state==30 
                              | state==36 | state==43 | state==46 | state==49 | state==50 | state==51; 	/*West*/                              
#delimit cr
****************************************************

* Drop non-male household heads
bysort person: egen sex2=mode(sex), minmode
drop sex
rename sex2 sex
replace sample_select=0 if sex!=1

*Drop latino sample
replace sample_select = 0 if latino==1

* Recode employment status
gen empst=.
gen tempempst =.
forvalues y=7(-1)1{
replace tempempst=`y' if empst1==`y' | empst2==`y' | empst3==`y'
}
replace tempempst=. if empst1==.
replace empst=tempempst

gen wempst=.
gen tempwempst =.
forvalues y=7(-1)1{
replace tempwempst=`y' if wempst1==`y' | wempst2==`y' | wempst3==`y'
}
replace tempwempst=. if wempst1==.
replace wempst=tempwempst


* Recode education so consistent across waves
gen tempeduc=.
replace tempeduc = 1 if educ_cat <=3 & educ_cat >=0 & year < 1990
replace tempeduc = 2 if (educ_cat == 4 | educ_cat == 5) & year < 1990
replace tempeduc = 3 if educ_cat >= 6 & educ_cat <= 8 & year < 1990
replace tempeduc =. if educ_cat ==9 & year < 1990

replace tempeduc = 1 if educ >= 0 & educ <=11 & year >= 1990
replace tempeduc = 2 if educ == 12 & year >= 1990
replace tempeduc = 3 if educ > 12 & educ <=17 & year >= 1990
replace tempeduc = . if educ > 17 & year >= 1990
replace educ = tempeduc


gen tempweduc=.
replace tempweduc = 1 if weduc_cat <=3 & weduc_cat >=0 & year < 1990
replace tempweduc = 2 if (weduc_cat == 4 | weduc_cat == 5) & year < 1990
replace tempweduc = 3 if weduc_cat >= 6 & weduc_cat <= 8 & year < 1990
replace tempweduc =. if weduc_cat ==9 & year < 1990

replace tempweduc = 1 if weduc >= 0 & weduc <=11 & year >= 1990
replace tempweduc = 2 if weduc == 12 & year >= 1990
replace tempweduc = 3 if weduc > 12 & weduc <=17 & year >= 1990
replace tempweduc = . if weduc > 17 & year >= 1990
replace weduc = tempweduc


egen eductemp=max(educ), by(person)
replace educ = eductemp
replace sample_select=0 if eductemp==.



egen weductemp=max(weduc), by(sp_person)
replace weduc = weductemp




* Code family composition change
gen fam_change_other=.
replace fam_change_other=0 if fchg==0
replace fam_change_other=1 if fchg==1

gen fam_change_spouse=.
replace fam_change_spouse=0 if fchg==0
replace fam_change_spouse=1 if fchg==2

* Code firm specific experience
gen experience = (52*exp_job_years + 4*exp_job_months + exp_job_weeks)/52

* Code FTE dummy
gen full_time=.
replace full_time =1 if empst==1
replace full_time=0 if empst==2 | empst==3

gen wfull_time=.
replace wfull_time =1 if wempst==1
replace wfull_time=0 if wempst==2 | wempst==3

* Code FTE dummy
gen full_time2=.
replace full_time2 =1 if empst==1  | empst==2 | empst==3
replace full_time2=0 if inrange(empst,4,8)

gen wfull_time2=.
replace wfull_time2 =1 if wempst==1  | wempst==2 | wempst==3
replace wfull_time2=0 if inrange(wempst,4,8)

* Code to combine unempl and layoff
gen unempl2 = .
replace unempl2 = 1 if unempl == 0 & layoff ==0
replace unempl2 = 1 if unempl == 0 & layoff ==.
replace unempl2 = 0 if unempl ==1 | layoff ==1 


* Drop those with always missing info on age 
replace age=. if age ==0 | age >110
egen n=sum(person!=.), by(person)
egen na=sum(age==. | age==0), by(person)
tabstat na, by(na) stat(N)
replace sample_select=0 if n==na
drop n na

replace agew=. if agew==0 | agew>110
egen n=sum(person!=.),by(sp_person)
egen na=sum(agew==.),by(sp_person)
tabstat na, by(na) stat(N)

drop n na

* Recode age so that there is no gap or jump
egen lasty=max(year), by(person)
replace sample_select=0 if lasty==.
gen lastage=age if year==lasty
gen b=year-lastage
egen yb=sum(b),by(person)
replace yb=. if yb==0
replace age=year-yb
drop lasty lastage b

egen lasty=max(year) if agew!=., by(sp_person)

gen lastage=agew if year==lasty
gen b=year-lastage
egen ybw=sum(b),by(sp_person)
replace ybw=. if ybw==0
replace agew=year-ybw

drop lasty lastage b

* Takes into account the retrospective nature of the data
replace age=age-1
replace agew=agew-1

gen temp = year-agew if agew != .
bysort sp_person: egen sp_birthy = mode(temp), maxmode
drop temp


replace sample_select=0 if race==. | race==0

* Recode newborn
gen newborntemp=.
replace newborntemp=0 if inrange(newborn,2,20)
replace newborntemp=1 if newborn==1
replace newborn=newborntemp

gen agebin=.
replace agebin=1 if age>=25 & age <43
replace agebin=0 if age>=43 & age <60

gen agewbin=.
replace agewbin=1 if agew>=25 & agew <43
replace agewbin=0 if agew>=43 & agew <60







********************************************************************************************
* 2. Prepare for NBER's TAXSIM Package:
***  You need entries for ALL TAXSIM inputs or it won't run (i.e. set to 0 if we don't have): 
********************************************************************************************

gen taxsimid=1
replace taxsimid=sum(taxsimid)

preserve
drop if latino==1 /*Don't have key income measures*/
*drop if sample_select==0
*replace year = year-1 /* Account for retrospective nature of income data */

* Generate TAXSIM compatible state codes from PSID codes
gen state2=.
replace state2=1 if state ==1
replace state2=state+1 if state >= 2 & state <=10
replace state2=state+2 if state >=11 & state <=49
replace state2 = 2 if state ==50
replace state2 = 12 if state ==51
replace state=state2
replace state = 0 if state == .

gen mstat =2 /* Make PSID marital code */

gen page=age /* Head age */
replace page = 0 if page == . | page < 0

gen sage =agew /* Wife age */
replace sage = 0 if sage == . | sage < 0

gen depx = kids /* Dependents */

gen pwages = max(ly, wly) /* Primary taxpayer.*/

gen swages = min(ly, wly) /* Secondary taxpayer */

gen dividends = divi + diviw + trustinc  + trustincw + businc + busincw /* Treat: trusts, royalty income, business asset income as divis too */
replace dividends=0 if dividends==. | dividends<0 /* Should be none missing - all imputed post 2005 */
replace dividends=0 /*NO ASSET INCOME*/

gen intrec=interest + interestw /* Should be none missing - all imputed post 2005 */
replace intrec=0 if intrec==. | intrec<0
replace intrec=0 /*NO ASSET INCOME*/

gen stcg =0 /* Not including capital gains */

gen ltcg=0 /* Not including capital gains */

gen otherprop= renty + rentyw /* Should be none missing - all imputed post 2005 */
replace otherprop=0 if otherprop==. | otherprop<0
replace otherprop=0 /*NO ASSET INCOME*/

gen nonprop=0 /* Putting the transfers through together in transfers */

gen pensions=0 /* Looking at in work only */

gen gssi=soc_sec

replace gssi = 0 if gssi==. | gssi <0

gen ui=0 /* Putting all social security throgh gssi */

gen transfers=trhw
replace transfers = 0 if transfers==. | transfers <0

gen rentpaid=rent /* Not including housing */
replace rentpaid = 0 if rentpaid == .

gen proptax = prop_tax 
replace proptax = 0 if proptax==. | proptax<0 /* Should be none missing - all imputed post 2005 */

gen otheritem =0 /* Not using other itemized deductions */

replace childcare =0 if childcare==. | childcare<0 /* Should be none missing - all imputed post 2005 */

replace mortgage = 0  /* Could add mortgage interest and medical deductibles */

* Keep only variables to be uploaded to TAXSIM
keep taxsimid year state mstat page sage depx pwages swages dividends intrec stcg ltcg otherprop nonprop pensions gssi ui transfers rentpaid proptax childcare mortgage
order taxsimid year state mstat page sage depx pwages swages dividends intrec stcg ltcg otherprop nonprop pensions gssi ui transfers rentpaid proptax childcare mortgage 

taxsim27 

* Output to directory
save "$TAXSIM\TAXSIM_main.dta", replace
restore

ren person newid
ren sp_person sp_newid
save "Full_Panel_2a.dta", replace

use "Full_Panel_2a.dta", clear


merge 1:1 taxsimid using "$TAXSIM\TAXSIM_main.dta"
tab _merge 
drop _merge

gen alltax = fiitax+siitax+fica/2
ren y_posttax y_posttaxsim
gen y_posttax = y - alltax

*adding housing imputation to rent paid
replace house= . if house > 999999
replace rent=0 if rent==. | rent<0
replace rent = 0.06*house if house>0

egen spending=rowtotal(totfood food_out health education util childcare transport rent homeins), missing
replace spending = . if year < 1999
egen spending_miss = rowmiss(totfood food_out health education util childcare transport rent homeins)

*adjusting dollar values by price index:
foreach x of varlist tr_* trall_nonss troth uihead uiwife wcomp wcompw businc farmasset busincw gardasset roomerinc renty divi interest trustinc diviw interestw trustincw rentyw y nlyhw y_posttax y_posttaxsim taxsim lss_posttax alltax nonhy trhw wly ly tyoth soc_sec wages wagesw minwage spending totfood food_out health education util childcare transport rent homeins home_equity bus_ass cash real_estate stocks vehicles other_ass ira other_debt w_with_h w_no_h w_no_bus {
replace `x' = `x'/price
}
replace lc_impute=lc_impute-log(price)

gen lw = log((ly/hours))
gen lw_nominal = lw + log(price)
gen sp_lw = log((wly/sp_hours))
gen sp_lw_nominal = sp_lw + log(price)

**********
***FURTHER Restrictions for regression sample: education, self-employment and wage growth
***sample_select should be comparable to lpdata_withspouse_forhealthrisk.dta
***except that it doesn't impose the education restriction yet.
***it restricts to men not in the latino sample with non-missing data in some variables
**********
gen sample_analytic = sample_select
*don't consider people who are ever self-employed
egen sf=sum(self==3),by(newid)	
replace sample_analytic = 0 if sf>0	

***RESTRICTING OUTLIERS ON HEAD'S WAGE, OR LITTLE WORK:
scalar def lowlim		=0.85
scalar def highlim		=5
*****

*identifying outlier wage growth
sort newid year
qby newid:gen grly=(exp(lw_nominal)/exp(lw_nominal[_n-1]))-1
replace grly=. if grly==-1
gen suspect=0
qui su grly,d

replace suspect=grly<-lowlim|(grly>highlim & grly!=.)	
egen ss=sum(suspect),by(newid)				/*# of outlier wage growth records*/
egen sw=sum(exp(lw)<0.5*minwage),by(newid)	/*#of wages below state-level minimum wage*/
egen numy=sum(newid!=.),by(newid)			/*# of obs*/


replace sample_analytic = 0 if sw>0|numy<3|ss>0				/*Drop those with outlier wage growth, below 1/2 of mw, or with less than 3 obs*/
drop sf grly suspect ss sw numy
*The above REALLY drops guys in 2016. For some reason, a lot of new heads in 2016.

**CHANGE JUNE 2023: no longer excluding higher educated people immediately. I will use them for some analyses***							
*replace sample_analytic = 0 if educ > 2
replace sample_analytic = 0 if age < 23 | age >62
 
bysort newid: egen everSSI=max(tr_ssi >0 & tr_ssi != . & DI ==0 & sample_analytic == 1)
gen emp = hours > 1500 & hours != . 
bysort newid: egen everworkhealthy=max(emp == 1 & DS == 0 & sample_analytic == 1)

gen sample_health = sample_analytic

replace sample_analytic = 0 if everworkhealthy == 0 | everSSI == 1
drop emp
 
save "Full_Panel_2b.dta", replace

erase "Full_Panel_2a.dta"
cd "$programs"

